load libraries
## Warning: package 'dplyr' was built under R version 3.2.2
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
##
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
##
## The following object is masked from 'package:stats':
##
## xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, as.vector, cbind,
## colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
## intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unlist, unsplit
##
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
##
## The following object is masked from 'package:Biobase':
##
## combine
##
## The following object is masked from 'package:BiocGenerics':
##
## combine
##
## The following objects are masked from 'package:dplyr':
##
## combine, first, last
##
## The following object is masked from 'package:stats':
##
## nobs
##
## The following object is masked from 'package:utils':
##
## object.size
load data
load("/Users/siddisis/Dropbox/work/Ribosome_profiling/Primate_comparison/manuscript/data/rdas/eSetRRP.rda")
log2ribo<-exprs(eSetRRP.RP.Q.log2[,eSetRRP.RP.Q.log2$seqData == "ribo"])
log2rna<-exprs(eSetRRP.RP.Q.log2[,eSetRRP.RP.Q.log2$seqData == "rna"])
log2TE <- log2ribo - log2rna
read.xls("/Users/siddisis/Dropbox/work/Ribosome_profiling/Primate_comparison/primate_ribo_gender_mmbatch.xlsx")->cell.source
cell.source<-cell.source[,1:4]
cell.source<-cell.source[-11,]
batch.r<-cell.source$libmix
pca
pc<-prcomp(t(na.omit(log2TE)),center = T,scale = T)
barplot((pc$sdev)^2/sum((pc$sdev)^2), names.arg = colnames(pc$x))
spec<-substring(rownames(pc$x),1,1)
temp.data<-as.data.frame(cbind(melt(pc$x),pc$x[,1],spec,cell.source$libmix,cell.source$sex,cell.source$source.center))
names(temp.data)<-c("ID","PC","value","pc1","species","batch","sex","source")
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = species))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = batch))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = sex))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = source))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = species, shape = batch ))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = batch, shape = species ))+facet_wrap(~PC)
use batched corrected ribo data
load("../rdas/HRC.plus.mm4H.ribo.rda")
load("../rdas/HCR.ortho.geneLength.rda")
HRC.plus.mm4H.rpkm.speLength <- prop.table(as.matrix(HRC.plus.mm4H.ribo[,-1]),2)*10^9
HRC.plus.mm4H.rpkm.speLength <- cbind(HRC.plus.mm4H.rpkm.speLength[,1:5]/HCR.geneLength$Chimp,HRC.plus.mm4H.rpkm.speLength[,6:11]/HCR.geneLength$Human,HRC.plus.mm4H.rpkm.speLength[,12:16]/HCR.geneLength$Rhesus,HRC.plus.mm4H.rpkm.speLength[,17:20]/HCR.geneLength$Human)
HRC.plus.mm4H.rpkm.speLength<-cbind(HRC.plus.mm4H.ribo$ENSGID,as.data.frame(HRC.plus.mm4H.rpkm.speLength))
expr.filter<-HRC.plus.mm4H.rpkm.speLength$`HRC.plus.mm4H.ribo$ENSGID` %in% row.names(log2ribo)
HRC.plus.mm4H.rpkm.speLength<-HRC.plus.mm4H.rpkm.speLength[expr.filter,]
library(limma)
##
## Attaching package: 'limma'
##
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
HRC.plus.mm4H.rpkm.speLength.Q<-normalizeQuantiles(HRC.plus.mm4H.rpkm.speLength[,-1],ties = T)
log2.HRC.plus.mm4H.rpkm.speLength.Q<-log2(HRC.plus.mm4H.rpkm.speLength.Q)
is.na(log2.HRC.plus.mm4H.rpkm.speLength.Q)<-is.infinite(as.matrix(log2.HRC.plus.mm4H.rpkm.speLength.Q))
spec<-substring(colnames(log2.HRC.plus.mm4H.rpkm.speLength.Q),1,1)
spec[17:20]<-c("H","H","H","H")
spec.design <- model.matrix(~ 0+spec)
read.xls("/Users/siddisis/Dropbox/work/Ribosome_profiling/Primate_comparison/primate_ribo_gender_mmbatch.xlsx")->cell.source
cell.source<-cell.source[,1:4]
batch.f<-as.factor(c(as.vector(cell.source$libmix),c("mm4","mm4","mm4","mm4")))
#batch.removed.limma.full<-removeBatchEffect(log2.HRC.plus.mm4H.rpkm.speLength.Q,batch = as.factor(batch))
batch.removed.limma.full<-removeBatchEffect(log2.HRC.plus.mm4H.rpkm.speLength.Q,batch = as.factor(batch.f),design = spec.design)
pc<-prcomp(t(na.omit(batch.removed.limma.full)),center = T,scale = T)
#pc<-prcomp(batch.fit$residuals[1:13,],center = T,scale = T)
barplot((pc$sdev)^2/sum((pc$sdev)^2), names.arg = colnames(pc$x))
spec<-substring(rownames(pc$x),1,1)
temp.data<-as.data.frame(cbind(melt(pc$x),pc$x[,1],spec,batch.f))
names(temp.data)<-c("ID","PC","value","pc1","species","batch")
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = species, shape=batch))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = batch))+facet_wrap(~PC)
log2TE.removedBatch <- batch.removed.limma.full[,c(-11,-17:-20)] - log2rna
pc<-prcomp(t(na.omit(log2TE.removedBatch)),center = T,scale = T)
barplot((pc$sdev)^2/sum((pc$sdev)^2), names.arg = colnames(pc$x))
spec<-substring(rownames(pc$x),1,1)
temp.data<-as.data.frame(cbind(melt(pc$x),pc$x[,1],spec,batch.r))
names(temp.data)<-c("ID","PC","value","pc1","species","batch")
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = species))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = batch))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = species, shape = batch ))+facet_wrap(~PC)
Btach correct RNA seq data
#library(gdata)
read.xls("/Users/siddisis/Dropbox/work/Ribosome_profiling/Primate_comparison/Jenny_AthmaRNALib.xlsx")->rna.batch
rna.batch<-rna.batch[1:27,1:2]
rna.batch<-gsub("-",".",as.matrix(rna.batch))
rna.batch<-as.data.frame(gsub("\\.","",rna.batch))
sample.names<-sub(".rna","",colnames(log2rna))
rna.batch<-rna.batch[rna.batch$Sample.Name%in%sample.names,]
rna.batch<-rna.batch[order(rna.batch$Sample.Name),]
spec<-substring(colnames(log2rna),1,1)
pc<-prcomp(t(na.omit(log2rna)),center = T,scale = T)
barplot((pc$sdev)^2/sum((pc$sdev)^2), names.arg = colnames(pc$x))
spec<-substring(rownames(pc$x),1,1)
temp.data<-as.data.frame(cbind(melt(pc$x),pc$x[,1],spec,rna.batch$Master.Mix))
#temp.data<-as.data.frame(cbind(melt(pc$x),pc$x[,1],spec,batch.r))
names(temp.data)<-c("ID","PC","value","pc1","species","batch")
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = batch))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = species))+facet_wrap(~PC)
spec.design <- model.matrix(~ 0+spec)
batch.removed.rna<-removeBatchEffect(log2rna,batch = rna.batch$Master.Mix,design = spec.design)
pc<-prcomp(t(na.omit(batch.removed.rna)),center = T,scale = T)
barplot((pc$sdev)^2/sum((pc$sdev)^2), names.arg = colnames(pc$x))
spec<-substring(rownames(pc$x),1,1)
temp.data<-as.data.frame(cbind(melt(pc$x),pc$x[,1],spec,rna.batch$Master.Mix))
#temp.data<-as.data.frame(cbind(melt(pc$x),pc$x[,1],spec,batch.r))
names(temp.data)<-c("ID","PC","value","pc1","species","batch")
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = batch))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = species))+facet_wrap(~PC)
log2TE.removedBatch <- batch.removed.limma.full[,c(-11,-17:-20)] - batch.removed.rna
pc<-prcomp(t(na.omit(log2TE.removedBatch)),center = T,scale = T)
barplot((pc$sdev)^2/sum((pc$sdev)^2), names.arg = colnames(pc$x))
spec<-substring(rownames(pc$x),1,1)
#temp.data<-as.data.frame(cbind(melt(pc$x),pc$x[,1],spec,rna.batch$Master.Mix))
temp.data<-as.data.frame(cbind(melt(pc$x),pc$x[,1],spec,batch.r))
names(temp.data)<-c("ID","PC","value","pc1","species","batch")
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = batch))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = species, shape = batch ))+facet_wrap(~PC)
temp.data %>% ggplot(aes(x=pc1,y=value))+geom_point(aes(color = species))+facet_wrap(~PC)